Project Set-up
#Load Packages
library(tidyr)
library(leaflet)
library(ggplot2)
library(dplyr)
library(sf)
library(readxl)
library(sp)
library(htmltools)
library(RColorBrewer)
library(tidygeocoder)
library(tidycensus)
library(tigris)
library(units)
library(ggfortify)
library(broom)
library(lubridate)
library(car)
#Set Working Directory
setwd("/Users/sarahhoffman/Library/CloudStorage/OneDrive-UniversityofMaryland/data_sources")
School Location
#Create Data Table of School Locations and limit columns
public.schools <- read.csv("school_locations.csv") %>%
select(NAME, LATITUDE, LONGITUDE)
#Create Data Table of Charter Schools
charter.schools <- read.csv("charter_schools.csv") %>%
geocode(Address, method = 'osm', lat = latitude, long = longitude) %>% #Convert addresses to coordinates
select(charter.latlong, School.Name, latitude, longitude) %>%
#Limit columns
colnames(charter.schools) <- c("NAME", "LATITUDE", "LONGITUDE") #Change column names
#Manually input NA data
charter.schools[59, 2] <- 38.85632429624879
charter.schools[59, 3] <- -76.98913290464318
charter.schools[69, 2] <- 38.83197915527436
charter.schools[69, 3] <- -77.01854284512433
#Combine public and charter schools
school.locations <- rbind(public.locations, charter.locations)
#Download as csv file for future use
write.csv(school.locations, "/Users/sarahhoffman/Library/CloudStorage/OneDrive-UniversityofMaryland/data_sources/schoolcoordinates.csv", row.names = TRUE)
#Load school locations
school.locations <- read.csv("schoolcoordinates.csv")
#Create school radius
school.radius <- school.locations %>%
st_as_sf(coords = c("LONGITUDE", "LATITUDE"), crs = 4326) %>%
sf::st_buffer (dist = 402)
head(school.radius)
## Simple feature collection with 6 features and 2 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -77.0452 ymin: 38.83486 xmax: -76.97839 ymax: 38.95189
## Geodetic CRS: WGS 84
## X NAME geometry
## 1 1 Amidon-Bowen Elementary School POLYGON ((-77.01547 38.8824...
## 2 2 Anacostia High School POLYGON ((-76.98737 38.8715...
## 3 3 Ballou High School POLYGON ((-76.99684 38.8394...
## 4 4 Bancroft Elementary School POLYGON ((-77.03796 38.9373...
## 5 5 Bard High School Early College DC POLYGON ((-76.98766 38.8415...
## 6 6 Barnard Elementary School POLYGON ((-77.01614 38.9516...
Crash Data
#Create Data Table of Crashes Involving Pedestrians
crash <- read_xlsx(path = "crashes_19_23.xlsx", sheet = "crashes_19_23_ped") %>%
#Convert crash coordinates to sf
st_as_sf(coords = c("LONGITUDE", "LATITUDE"), crs = 4326)
#Match crashes to school
crash_filter <- st_filter(crash, school.radius) %>% #Filter to those that fall within radius
st_join(left = TRUE, school.radius["NAME"])
#Transform report date column to as.POSIXct
crash_filter$REPORTDATE <- as.POSIXct(crash_filter$REPORTDATE, format = "%Y/%m/%d %H:%M:%S")
#Create columns for year, month, day of week, and hours of day
crash_filter <- crash_filter %>%
mutate(year = year(crash_filter$REPORTDATE),
month = month(crash_filter$REPORTDATE, label = TRUE),
day = wday(REPORTDATE, label = TRUE),
hour = hour(REPORTDATE))
head(crash_filter)
## Simple feature collection with 6 features and 69 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -77.02971 ymin: 38.89882 xmax: -76.97417 ymax: 38.92377
## Geodetic CRS: WGS 84
## # A tibble: 6 × 70
## X Y CRIMEID CCN REPORTDATE ROUTEID MEASURE OFFSET
## <dbl> <dbl> <dbl> <dbl> <dttm> <chr> <dbl> <dbl>
## 1 -8568749. 4707150. 27906900 1.82e7 2019-01-01 01:06:54 120153… 824. 6.46
## 2 -8568749. 4707150. 27906900 1.82e7 2019-01-01 01:06:54 120153… 824. 6.46
## 3 -8568749. 4707150. 27906900 1.82e7 2019-01-01 01:06:54 120153… 824. 6.46
## 4 -8568749. 4707150. 27906900 1.82e7 2019-01-01 01:06:54 120153… 824. 6.46
## 5 -8574908. 4710759. 27906921 1.82e7 2019-01-01 01:53:57 110338… 626. 0.07
## 6 -8574908. 4710759. 27906921 1.82e7 2019-01-01 01:53:57 110338… 626. 0.07
## # ℹ 62 more variables: STREETSEGID <dbl>, ROADWAYSEGID <dbl>, FROMDATE <chr>,
## # TODATE <lgl>, ADDRESS <chr>, XCOORD <dbl>, YCOORD <dbl>, WARD <chr>,
## # EVENTID <chr>, MAR_ADDRESS <chr>, MAR_SCORE <dbl>,
## # MAJORINJURIES_BICYCLIST <dbl>, MINORINJURIES_BICYCLIST <dbl>,
## # UNKNOWNINJURIES_BICYCLIST <dbl>, FATAL_BICYCLIST <dbl>,
## # MAJORINJURIES_DRIVER <dbl>, MINORINJURIES_DRIVER <dbl>,
## # UNKNOWNINJURIES_DRIVER <dbl>, FATAL_DRIVER <dbl>, …
#Count number of crashes, number of pedestrians, and number of major and fatal injuries
crash_school <- crash_filter %>%
group_by(NAME) %>%
summarise(total_crashes = n(),
total_pedestrians = sum(TOTAL_PEDESTRIANS),
maj_fat = sum(MAJORINJURIES_PEDESTRIAN, FATAL_PEDESTRIAN))
#Join crash_school data with school location
school.crash <- school.locations %>%
full_join(crash_school, by = c("NAME"), suffix=c("",".y")) %>%
full_join(school.radius, by = c("NAME"), suffix=c("",".y"))
#Change NA to 0
school.crash <- school.crash %>%
mutate_at(vars(total_crashes, total_pedestrians, maj_fat), ~replace_na(., 0))
head(school.crash)
## X NAME LATITUDE LONGITUDE total_crashes
## 1 1 Amidon-Bowen Elementary School 38.87952 -77.01813 14
## 2 2 Anacostia High School 38.87008 -76.98308 27
## 3 3 Ballou High School 38.83851 -77.00136 6
## 4 4 Bancroft Elementary School 38.93432 -77.04055 18
## 5 5 Bard High School Early College DC 38.84498 -76.98615 21
## 6 6 Barnard Elementary School 38.94823 -77.01778 2
## total_pedestrians maj_fat geometry X.y
## 1 15 1 MULTIPOINT ((-77.02256 38.8... 1
## 2 32 1 MULTIPOINT ((-76.97855 38.8... 2
## 3 7 2 MULTIPOINT ((-77.00565 38.8... 3
## 4 22 2 MULTIPOINT ((-77.03831 38.9... 4
## 5 24 4 MULTIPOINT ((-76.98557 38.8... 5
## 6 2 0 MULTIPOINT ((-77.02107 38.9... 6
## geometry.y
## 1 POLYGON ((-77.01547 38.8824...
## 2 POLYGON ((-76.98737 38.8715...
## 3 POLYGON ((-76.99684 38.8394...
## 4 POLYGON ((-77.03796 38.9373...
## 5 POLYGON ((-76.98766 38.8415...
## 6 POLYGON ((-77.01614 38.9516...
Crash Graphs
#Look at crashes by year
crashes_year <- crash_filter %>%
group_by(year) %>%
summarize(total_crashes = n(), .groups = 'drop')
crashes_year_label <- format(round(crashes_year$total_crashes, digits = 0),big.mark=",")
ggplot(crashes_year, aes(year, total_crashes)) +
geom_bar(stat = "identity", fill = "skyblue") +
theme_minimal() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(linetype = "solid")
) +
labs(title = "Number of Pedestrian Involved Crashes by Year",
x = "Year",
y = "Number of Crashes") +
geom_text(aes(label=crashes_year_label), position=position_dodge(width=0.9), vjust=-0.25)

#Look at top schools
crashes_school <- crash_filter %>%
group_by(NAME) %>%
summarize(total_crashes = n(), .groups = 'drop') %>%
arrange(desc(total_crashes)) %>%
slice_head(n = 10)
crashes_school <- crashes_school %>%
st_drop_geometry(.)
crashes_school_radius <- school.radius %>%
right_join(crashes_school, by = "NAME")
crashes_school_label <- format(round(crashes_school$total_crashes, digits = 0),big.mark=",")
ggplot(crashes_school, aes(total_crashes, reorder(NAME, total_crashes))) +
geom_bar(stat = "identity", fill = "skyblue") +
theme_minimal() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(linetype = "solid")
) +
labs(title = "Top 10 Schools for \nPedestrian Involved Crashes",
x = "Total Crashes",
y = "School") +
scale_x_continuous(limits = c(0,130)) +
geom_text(aes(label=crashes_school_label), position=position_dodge(width=0.9), hjust=-0.25, size = 3)

dc.outline <- counties(state = "DC", year = 2022)
dc.roads <- roads(state = "DC", county = "District of Columbia", year = 2022)
ggplot() +
geom_sf(data = dc.outline, color = "grey", fill = "white") +
geom_sf(data = dc.roads, color = "grey") +
geom_sf(data = crashes_school_radius, size = 5, color = "blue", alpha = 0.5) +
theme_minimal() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line.y = element_blank(),
axis.text.y = element_blank(),
axis.line.x = element_blank(),
axis.text.x = element_blank(),
) +
labs(title = "Top 10 Schools for Pedestrian Involved Crashes")

ggplot() +
geom_sf(data = dc.roads, color = "grey", fill = "white") +
geom_sf(data = crashes_school_radius, size = 5, color = "blue", alpha = 0.5) +
theme_minimal() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line.y = element_blank(),
axis.text.y = element_blank(),
axis.line.x = element_blank(),
axis.text.x = element_blank(),
) +
scale_x_continuous(limits = c(-77.05,-77.00)) +
scale_y_continuous(limits = c(38.87,38.95)) +
labs(title = "Top 10 Schools for \nPedestrian Involved Crashes")

#Look at top schools for crashes by year
crashes_year_school <- crash_filter %>%
group_by(year, NAME) %>%
summarize(total_crashes = n(), .groups = 'drop') %>%
arrange(year, desc(total_crashes)) %>%
group_by(year) %>%
slice_head(n = 5) %>%
mutate (ToHighlight = ifelse(total_crashes == max(total_crashes), "yes", "no"))
school_data <- format(round(crashes_year_school$total_crashes, digits = 0),big.mark=",")
ggplot(crashes_year_school, aes(y = NAME, x = total_crashes, fill = ToHighlight)) +
geom_bar(stat = "identity") +
facet_wrap(~ year, scales = "fixed", nrow = 1) + # Create a separate plot for each year
geom_text(aes(label=school_data), position=position_dodge(width=1), vjust=-1, hjust=-0.25, size = 3) + #Add data labels
labs(title = "Top 5 Schools for \nNumber of Pedestrian-Involved \nCrashes by Year",
x = "Total Crashes",
y = "School") +
scale_x_continuous(limits = c(0,50), breaks = seq(0, 50, by = 5)) +
theme(axis.text.y = element_text( hjust = 1),
axis.line.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks = element_blank(),
legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())

#Look at crashes by month
crashes_month <- crash_filter %>%
group_by(month) %>%
summarize(total_crashes = n(), .groups = 'drop')
crashes_month_label <- format(round(crashes_month$total_crashes, digits = 0),big.mark=",")
ggplot(crashes_month, aes(month, total_crashes)) +
geom_bar(stat = "identity", fill = "skyblue") +
theme_minimal() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(linetype = "solid")
) +
labs(title = "Number of Pedestrian Involved Crashes by Month",
x = "Month",
y = "Number of Crashes") +
geom_text(aes(label=crashes_month_label), position=position_dodge(width=0.9), vjust=-0.25)

#Look at crashes by day of week
crashes_day <- crash_filter %>%
group_by(day) %>%
summarize(total_crashes = n(), .groups = 'drop')
crashes_day_label <- format(round(crashes_day$total_crashes, digits = 0),big.mark=",")
ggplot(crashes_day, aes(day, total_crashes)) +
geom_bar(stat = "identity", fill = "skyblue") +
theme_minimal() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(linetype = "solid")
) +
labs(title = "Number of Pedestrian Involved Crashes by Day of Week",
x = "Day of Week",
y = "Number of Crashes") +
geom_text(aes(label=crashes_day_label), position=position_dodge(width=0.9), vjust=-0.25)

#Look at crashes by hour of day
crashes_hour <- crash_filter %>%
group_by(hour) %>%
summarise(total_crashes = n()) %>%
arrange(hour)
hour_labels <- c("12", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11")
crashes_hour_label <- format(round(crashes_hour$total_crashes, digits = 0),big.mark=",")
ggplot(crashes_hour, aes(x = factor(hour), y = total_crashes)) +
geom_bar(stat = "identity", fill = "skyblue") +
labs(title = "Number of Pedestrian Involved Crashes by Hour of the Day",
x = "Hour of Day",
y = "Number of Crashes") +
geom_text(aes(label=crashes_hour_label), position=position_dodge(width=0.9), vjust=-0.25, size = 3) +
theme_minimal() +
scale_x_continuous(breaks = 0:23) + # Set x-axis to show all hours
scale_x_discrete(labels = hour_labels) +
theme(
axis.line.y = element_blank(),
axis.text.y = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()
)

Tree Location Data
#Load tree location data
tree <- read.csv("number_trees.csv") %>%
filter(!is.na(X)) %>%
filter(!is.na(Y)) %>%
sf::st_as_sf(coords = c("X", "Y"), crs = 26985) %>% #Convert x and y coordinates to longitude and latitude
st_transform(crs = 4326) %>%
mutate(lat= st_coordinates(.)[,1],
lon = st_coordinates(.)[,2]) %>%
st_as_sf(coords = c("longitude", "latitude"), crs = 4326) #Convert back to sf
#Switch longitude and latitude columns
colnames(tree)[54] <- paste("longitude")
colnames(tree)[55] <- paste("latitue")
#Filter those trees that fall within schools
tree.school <- tree %>%
st_filter(school.radius) %>%
st_join(left = TRUE, school.radius["NAME"]) %>%
group_by(NAME) %>%
summarise(total_trees = n(), avg_crown = mean(CROWN_AREA, na.rm = TRUE)) #Count number of trees and average crown area
#Combine school location data with tree data
school.crash.tree <- school.crash %>%
full_join(tree.school, by = "NAME", suffix=c("",".y")) %>%
select(-ends_with(".y")) %>%
mutate(avg_crown = round(avg_crown, 0))
#Look at top ten schools for tree density
treed_school <- school.crash.tree %>%
arrange(desc(total_trees)) %>%
slice_head(n = 10)
treed_school_label <- format(round(treed_school$total_trees, digits = 0),big.mark=",")
ggplot(treed_school, aes(total_trees, reorder(NAME, total_trees))) +
geom_bar(stat = "identity", fill = "skyblue") +
theme_minimal() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(linetype = "solid")
) +
labs(title = "Top 10 Schools for \nTree Density",
x = "Total Trees",
y = "School") +
scale_x_continuous(limits = c(0,2000)) +
geom_text(aes(label=treed_school_label), position=position_dodge(width=0.9), hjust=-0.25, size = 3)

treed_school <- treed_school %>%
st_drop_geometry(.)
treed_school_radius <- school.radius %>%
right_join(treed_school, by = "NAME")
ggplot() +
geom_sf(data = dc.outline, color = "grey", fill = "white") +
geom_sf(data = dc.roads, color = "grey") +
geom_sf(data = treed_school_radius, size = 5, color = "blue", alpha = 0.5) +
theme_minimal() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line.y = element_blank(),
axis.text.y = element_blank(),
axis.line.x = element_blank(),
axis.text.x = element_blank(),
) +
labs(title = "Top 10 Schools for Tree Density")

Tree Coverage Data
#Create data table of tree coverage data
tree.coverage <- read.csv("treecanopy_20_all.csv") %>%
select(GEOID, UTC_PCT) %>%
transform(GEOID = as.character(GEOID))
#Update block groups to match 2022
crosswalk <- read.csv("crosswalk.csv") %>%
select(bg2010ge, bg2020ge) %>%
transform(bg2010ge = as.character(bg2010ge),
bg2020ge = as.character(bg2020ge))
#Get block group geometries
dc.blocks <- block_groups(state = "DC", year = 2010)
#Join block group geometries with tree coverage data
tree.coverage.geometry <- tree.coverage %>%
full_join(dc.blocks, by = c("GEOID" = "GEOID10")) %>%
left_join(crosswalk, by = c("GEOID" = "bg2010ge")) %>%
st_as_sf(., crs = 4326)
#Match tree coverage tracts to schools
tree.coverage.match <- st_join(school.radius, tree.coverage.geometry, largest = TRUE)
#Combine tree coverage with crash and tree count data
school.cr.tr.co <- school.crash.tree %>%
full_join(tree.coverage.match, by = "NAME", suffix=c("",".y")) %>%
select(-ends_with(".y"))
school.cr.tr.co$bg2020ge <- as.character(school.cr.tr.co$bg2020ge)
#Look at top 10 schools for tree coverage
treec_school <- school.cr.tr.co %>%
arrange(desc(UTC_PCT)) %>%
slice_head(n = 10)
ggplot(treec_school, aes(UTC_PCT, reorder(NAME, UTC_PCT))) +
geom_bar(stat = "identity", fill = "skyblue") +
theme_minimal() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(linetype = "solid")
) +
labs(title = "Top 10 Schools for \nTree Coverage",
x = "Percent Urban Tree Coverage",
y = "School") +
scale_x_continuous(limits = c(0,100)) +
geom_text(aes(label=UTC_PCT), position=position_dodge(width=0.9), hjust=-0.25, size = 3)

treec_school <- treec_school %>%
st_drop_geometry(.)
treec_school_radius <- school.radius %>%
right_join(treec_school, by = "NAME")
ggplot() +
geom_sf(data = dc.outline, color = "grey", fill = "white") +
geom_sf(data = dc.roads, color = "grey") +
geom_sf(data = treec_school_radius, size = 5, color = "blue", alpha = 0.5) +
theme_minimal() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line.y = element_blank(),
axis.text.y = element_blank(),
axis.line.x = element_blank(),
axis.text.x = element_blank(),
) +
labs(title = "Top 10 Schools for Tree Coverage")

Median Income and Population Data
#Set Census Api Key
census_api_key("9ac1cc1ce7936fa370a67b3f87d37169566116f6")
#Get population and median income data for block groups
block.data <- get_acs(geography = "block group",
state = "DC",
year = 2022,
survey = "acs5",
output = "wide",
variables = c(medincome = "B19013_001", pop = "B01003_001"),
geometry = TRUE) %>%
mutate(area = st_area(.))
#Check missing values
colSums(is.na(block.data))
## GEOID NAME medincomeE medincomeM popE popM geometry
## 0 0 83 115 0 0 0
## area
## 0
#Isolate rows with missing med income
block.data.inc.na <- block.data[is.na(block.data$medincomeE),]
#Load variables on household count by income category
inc.variables <- c("10" = "B19001_002", "10.15" = "B19001_003",
"15.20" = "B19001_004", "20.25" = "B19001_005", "25.30" = "B19001_006",
"30.35" = "B19001_007", "35.40" = "B19001_008", "40.45" = "B19001_009",
"45.50" = "B19001_010", "50.60" = "B19001_011", "60.75" = "B19001_012",
"75.100" = "B19001_013", "100.125" = "B19001_014", "125.150" = "B19001_015",
"150.200" = "B19001_016", "200" = "B19001_017")
block.inc <- get_acs(geography = "block group",
state = "DC",
year = 2022,
survey = "acs5",
output = "tidy",
variables = inc.variables)
block.inc$GEOID <- as.numeric(block.inc$GEOID)
block.inc$variable <- as.numeric(block.inc$variable)
#Find 50th percentile income category
inc.imp <- block.inc %>%
group_by(GEOID) %>%
summarise(med = median(rep(variable,estimate), na.rm = TRUE))
inc.imp$GEOID <- as.character(inc.imp$GEOID)
#Impute estimate of median income based on range
inc.imp <- inc.imp %>%
mutate(medincomeE = ifelse(med == "10", 10000, ifelse(med == "10.15", 12500, ifelse(med == "15.20", 17500, ifelse(med == "20.25", 22500, ifelse(med == "25.30", 27500, ifelse(med == "30.35", 32500, ifelse(med == "35.40", 37500, ifelse(med == "40.45", 42500, ifelse(med == "45.50", 47500, ifelse(med == "50.60", 55000, ifelse(med == "60.75", 67500, ifelse(med == "75.100", 87500, ifelse(med == "100.125", 125000, ifelse(med == "125.150", 137500, ifelse(med == "150.200", 175000, 200000))))))))))))))))
#Apply imputed values to original dataset
block.data.inc.imp <- block.data.inc.na %>%
left_join(inc.imp, by = "GEOID", suffix=c(".x","")) %>%
select(GEOID, medincomeE) %>%
st_drop_geometry(.)
block.data <- block.data %>%
left_join(block.data.inc.imp, by = "GEOID")
block.data$medincomeE.y <- ifelse(is.na(block.data$medincomeE.y), block.data$medincomeE.x, block.data$medincomeE.y)
block.data <- block.data %>%
select(-medincomeE.x)
#Check missing values again
colSums(is.na(block.data))
## GEOID NAME medincomeM popE popM area
## 0 0 115 0 0 0
## medincomeE.y geometry
## 21 0
#Calculate population density and proportion of population
block.data$area <- set_units(block.data$area, km^2)
block.data <- block.data %>%
mutate(density = (popE/area)/1000) %>%
mutate(pop_prop = popE/sum(popE))
#Join population and median income with combined data
school.cr.tr.co.ce <- school.cr.tr.co %>%
left_join(block.data, by = c("bg2020ge" = "GEOID"))
Road Classification, Speed limit, and Traffic Volume Data
#Load road classification data
road <- read.csv("roadway_classification.csv") %>%
select(SPEEDLIMITS_OB, DCFUNCTIONALCLASS, ADDRESS)
#Geocode road coordinates
road <- road %>%
geocode(ADDRESS, method = "osm", lat = latitude, lon = longitude)
#Save csv file for later use
st_write(road, "/Users/sarahhoffman/Library/CloudStorage/OneDrive-UniversityofMaryland/data_sources/roadcoordinates.csv")
#Load in csv file
road <- read.csv("roadcoordinates.csv")
#Omit NA values and convert to sf
road.clean <- na.omit(road) %>%
st_as_sf(coords = c("longitude", "latitude"), crs = 4326)
#Change 0 values for AADT to NA
road.clean$AADT[road.clean$AADT == 0] <- NA
#Create column to count major arterial and minor arterial roads
road.clean <- road.clean %>%
mutate(arterial = ifelse(DCFUNCTIONALCLASS == 14, TRUE, ifelse(DCFUNCTIONALCLASS ==16, TRUE, FALSE)))
#Filter and join roads to school radius
road.school <- st_filter(road.clean, school.radius) %>% #Filter to those that fall within radius
st_join(left = TRUE, school.radius["NAME"]) %>% #Match crashes to a school
group_by(NAME) %>%
summarise(avg_speed = mean(SPEEDLIMITS_OB), n_roads = n(), n_arterial = sum(arterial), prop_arterial = n_arterial/n_roads, total_vol = sum(AADT, na.rm = TRUE)) #Calculate average speed limit, number and proportion of arterial roads, and sum of AADT
road.school$total_vol[road.school$total_vol == 0] <- NA
#Join with school data set
school.cr.tr.co.ce.rd <- school.cr.tr.co.ce %>%
full_join(road.school, by = c("NAME.x" = "NAME"), suffix=c("",".y"))
#Select columns
school.data <- school.cr.tr.co.ce.rd %>%
select(c(NAME.x, total_crashes, total_pedestrians, maj_fat, total_trees, avg_crown, UTC_PCT, GEOID, bg2020ge, medincomeE.y, medincomeM, popE, popM, geometry.y, density, pop_prop, avg_speed, n_roads, n_arterial, prop_arterial, total_vol))
#Create column that calculates proportion of pedestrians involved that had major or fatal injuries
school.data <- school.data %>%
mutate(prop_majfat = maj_fat/total_pedestrians)
#Change NA Values to O
school.data$prop_majfat[is.na(school.data$prop_majfat)] <- 0
Safe Routes to School Data
#Load in SRTS data
school.srts <- read.csv("SRTS.csv")
#Join SRTS data with school.data
school.data <- school.data %>%
left_join(school.srts, by = "NAME.x")
Summary Statistics
#Total Crashes
summary(school.data$total_crashes)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 7.00 15.00 20.99 31.00 115.00
boxplot(school.data$total_crashes)

#Total Pedestrians Involved
summary(school.data$total_pedestrians)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 8.00 16.00 22.45 32.00 128.00
boxplot(school.data$total_pedestrians)

#Number of major or fatal injuries
summary(school.data$maj_fat)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 1.000 2.000 2.745 4.000 15.000
boxplot(school.data$maj_fat)

#Proportion of major or fatal injuries
summary(school.data$prop_majfat)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00000 0.04734 0.11111 0.12807 0.17167 0.75000
boxplot(school.data$prop_majfat)

#Total Trees
summary(school.data$total_trees)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 16.0 710.2 989.0 944.7 1164.2 1590.0
boxplot(school.data$total_trees)

#Average crown area
summary(school.data$avg_crown)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 209.0 638.8 758.0 762.8 875.0 1508.0
boxplot(school.data$avg_crown)

#Percent Urban Tree Coverage
summary(school.data$UTC_PCT)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 6.70 19.46 25.61 28.12 32.93 72.95
boxplot(school.data$UTC_PCT)

#Median Income
summary(school.data$medincomeE.y)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 12500 59114 102896 113899 153897 250001 10
boxplot(school.data$medincomeE.y)

#Population
summary(school.data$popE)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 926 1409 1344 1762 3385
boxplot(school.data$popE)

#Average Speed Limit
summary(school.data$avg_speed)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 20.00 20.81 21.46 21.62 22.21 25.95
boxplot(school.data$avg_speed)

#Number of Roads
summary(school.data$n_roads)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.00 38.00 49.00 48.41 59.00 92.00
boxplot(school.data$n_roads)

#Proportion Arterial
summary(school.data$prop_arterial)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.1386 0.2375 0.2528 0.3487 0.9348
boxplot(school.data$prop_arterial)

#Traffic Volume
summary(school.data$total_vol)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 6136 69825 136250 162229 226024 611373 8
boxplot(school.data$total_vol)

#Density
summary(school.data$density)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 2.364 4.778 6.170 7.119 41.350
boxplot(school.data$density)

#Standard Deviation
school.data.sd <- school.data %>%
select(total_crashes, total_pedestrians, maj_fat, prop_majfat, total_trees,
avg_crown, UTC_PCT, medincomeE.y, popE,
avg_speed, n_roads, prop_arterial, total_vol) %>%
apply(2, sd, na.rm = TRUE)
school.data.sd
## total_crashes total_pedestrians maj_fat prop_majfat
## 1.863234e+01 1.982786e+01 2.852337e+00 1.214837e-01
## total_trees avg_crown UTC_PCT medincomeE.y
## 3.005419e+02 1.812980e+02 1.309376e+01 6.385593e+04
## popE avg_speed n_roads prop_arterial
## 6.001058e+02 1.095409e+00 1.628168e+01 1.696150e-01
## total_vol
## 1.150898e+05
Histogram charts
#Histogram of number of pedestrian involved crashes
ggplot(school.data, aes(x = total_crashes)) +
geom_histogram(binwidth = 5, color = "darkgrey", fill = "grey") +
theme_minimal() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(linetype = "solid")
) +
scale_x_continuous(breaks = seq(0,120,10)) +
labs(title = "Distribution of Pedestrian \nInvolved Crashes in School Radius",
x = "Total Number of Pedestrian Involved Crashes",
y = "Count") +
geom_vline(aes(xintercept = mean(total_crashes)), color = "blue", alpha = 0.7) +
geom_vline(aes(xintercept = median(total_crashes)), color = "darkorange", alpha = 0.7) +
geom_text(aes(x=mean(total_crashes), label="\nMean", y=40), colour="blue", angle=90) +
geom_text(aes(x=median(total_crashes), label="Median\n", y=40), colour="darkorange", angle=90)

#Histogram of log of pedestrian involved crashes
ggplot(school.data, aes(x = log_crash)) +
geom_histogram(binwidth = 0.5, color = "darkgrey", fill = "grey") +
theme_minimal() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(linetype = "solid")
) +
labs(title = "Distribution of Pedestrian \nInvolved Crashes in School Radius (log)",
x = "Total Number of Pedestrian Involved Crashes (log)",
y = "Count") +
scale_x_continuous(breaks = seq(0,5,0.5)) +
geom_vline(aes(xintercept = mean(log_crash)), color = "blue", alpha = 0.7) +
geom_vline(aes(xintercept = median(log_crash)), color = "darkorange", alpha = 0.7) +
geom_text(aes(x=mean(log_crash), label="Mean\n", y=10), color="blue", angle=90) +
geom_text(aes(x=median(log_crash), label="\nMedian", y=10), color="darkorange", angle=90)

#Histogram of proportion of major or fatal injuries
ggplot(school.data, aes(x = prop_majfat)) +
geom_histogram(binwidth = 0.1, color = "darkgrey", fill = "grey") +
theme_minimal() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(linetype = "solid")
) +
labs(title = "Distribution of the Proportion \nof Major or Fatal Injuries",
x = "Proportion of Major or Fatal Injuries",
y = "Count") +
scale_x_continuous(breaks = seq(0,1,0.1)) +
geom_vline(aes(xintercept = mean(prop_majfat)), color = "blue", alpha = 0.7) +
geom_vline(aes(xintercept = median(prop_majfat)), color = "darkorange", alpha = 0.7) +
geom_text(aes(x=mean(prop_majfat), label="\nMean", y=50), color="blue", angle=90) +
geom_text(aes(x=median(prop_majfat), label="Median\n", y=50), color="darkorange", angle=90)

#Histogram of proportion of major or fatal injuries (log)
ggplot(school.data, aes(x = log_majfat)) +
geom_histogram(binwidth = 0.5, color = "darkgrey", fill = "grey") +
theme_minimal() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(linetype = "solid")
) +
labs(title = "Distribution of the Proportion \nof Major or Fatal Injuries (log)",
x = "Proportion of Major or Fatal Injuries (log)",
y = "Count") +
scale_x_continuous(breaks = seq(-4,0,0.5)) +
geom_vline(aes(xintercept = mean(log_majfat)), color = "blue", alpha = 0.7) +
geom_vline(aes(xintercept = median(log_majfat)), color = "darkorange", alpha = 0.7) +
geom_text(aes(x=mean(log_majfat), label="Mean\n", y=50), color="blue", angle=90) +
geom_text(aes(x=median(log_majfat), label="\nMedian", y=50), color="darkorange", angle=90)

#Histogram of tree density
ggplot(school.data, aes(x = total_trees)) +
geom_histogram(binwidth = 50, color = "darkgrey", fill = "grey") +
theme_minimal() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(linetype = "solid")
) +
scale_x_continuous(breaks = seq(0,2000,250)) +
labs(title = "Distribution of Tree Density",
x = "Number of Trees",
y = "Count") +
geom_vline(aes(xintercept = mean(total_trees)), color = "blue", alpha = 0.7) +
geom_vline(aes(xintercept = median(total_trees)), color = "darkorange", alpha = 0.7) +
geom_text(aes(x=mean(total_trees), label="Mean\n", y=5), color="blue", angle=90) +
geom_text(aes(x=median(total_trees), label="\nMedian", y=5), color="darkorange", angle=90)

#Histogram of tree coverage
ggplot(school.data, aes(x = UTC_PCT)) +
geom_histogram(binwidth = 5, color = "darkgrey", fill = "grey") +
theme_minimal() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(linetype = "solid")
) +
scale_x_continuous(breaks = seq(0,80,10)) +
scale_y_continuous(breaks = seq(0,60,10)) +
labs(title = "Distribution of Percent Urban Tree Coverage",
x = "Percent Urban Tree Coverage",
y = "Count") +
geom_vline(aes(xintercept = mean(UTC_PCT)), color = "blue", alpha = 0.7) +
geom_vline(aes(xintercept = median(UTC_PCT)), color = "darkorange", alpha = 0.7) +
geom_text(aes(x=mean(UTC_PCT), label="\nMean", y=10), color="blue", angle=90) +
geom_text(aes(x=median(UTC_PCT), label="Median\n", y=10), color="darkorange", angle=90)

Linear Regression Models
#Check correlation between independent variables
correlation_matrix <- cor(school.data[, c("UTC_PCT", "total_trees", "medincomeE.y",
"density", "avg_speed",
"prop_arterial", "total_vol",
"SRTS")], use="complete.obs")
correlation_matrix
## UTC_PCT total_trees medincomeE.y density avg_speed
## UTC_PCT 1.00000000 -0.23691423 -0.02347938 -0.38277089 -0.12954815
## total_trees -0.23691423 1.00000000 0.37943532 0.24199697 -0.01226230
## medincomeE.y -0.02347938 0.37943532 1.00000000 0.03464347 0.18965796
## density -0.38277089 0.24199697 0.03464347 1.00000000 0.22591737
## avg_speed -0.12954815 -0.01226230 0.18965796 0.22591737 1.00000000
## prop_arterial -0.29783091 0.08077900 0.26803253 0.35580099 0.53343231
## total_vol -0.37634369 0.28823088 0.27587765 0.50274562 0.46490971
## SRTS -0.02779615 0.01188117 0.04989265 -0.04852689 -0.03779806
## prop_arterial total_vol SRTS
## UTC_PCT -0.2978309 -0.37634369 -0.02779615
## total_trees 0.0807790 0.28823088 0.01188117
## medincomeE.y 0.2680325 0.27587765 0.04989265
## density 0.3558010 0.50274562 -0.04852689
## avg_speed 0.5334323 0.46490971 -0.03779806
## prop_arterial 1.0000000 0.71230493 0.10164940
## total_vol 0.7123049 1.00000000 0.09242527
## SRTS 0.1016494 0.09242527 1.00000000
#Run linear regression models
#Full model, non log-transformed crashes
full.mdl <- lm(total_crashes~UTC_PCT+total_trees+log.income+density+avg_speed+prop_arterial+total_vol+SRTS, data = school.data)
#Summary and coefficients
summary(full.mdl)
##
## Call:
## lm(formula = total_crashes ~ UTC_PCT + total_trees + log.income +
## density + avg_speed + prop_arterial + total_vol + SRTS, data = school.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.035 -8.027 -1.913 5.143 60.661
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.905e+01 2.883e+01 0.661 0.509523
## UTC_PCT -2.922e-01 8.353e-02 -3.498 0.000582 ***
## total_trees -2.672e-03 3.967e-03 -0.673 0.501487
## log.income -2.500e+00 1.674e+00 -1.493 0.137007
## density 4.584e-01 2.106e-01 2.176 0.030748 *
## avg_speed 1.179e+00 1.127e+00 1.046 0.296867
## prop_arterial 4.476e+00 9.695e+00 0.462 0.644841
## total_vol 7.457e-05 1.463e-05 5.097 8.26e-07 ***
## SRTS 1.435e-01 2.624e+00 0.055 0.956452
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.68 on 191 degrees of freedom
## (20 observations deleted due to missingness)
## Multiple R-squared: 0.4578, Adjusted R-squared: 0.4351
## F-statistic: 20.16 on 8 and 191 DF, p-value: < 2.2e-16
coefficients(full.mdl)
## (Intercept) UTC_PCT total_trees log.income density
## 1.905445e+01 -2.922229e-01 -2.671699e-03 -2.500252e+00 4.584156e-01
## avg_speed prop_arterial total_vol SRTS
## 1.179380e+00 4.475845e+00 7.457405e-05 1.434928e-01
full.mdl.residuals <- residuals(full.mdl) %>%
as.data.frame(.)
#Autoplot
full.mdl.plot <- autoplot(full.mdl, which = 1:3, nrow = 3, ncol = 1)
full.mdl.plot

#Distribution of residuals
ggplot(full.mdl.residuals, aes(.)) +
theme_minimal()+
labs(title = "Distribution of Residuals") +
geom_histogram()

#Full model, log-transformed crashes
full.log.mdl <- lm(log_crash~UTC_PCT+total_trees+log.income+density+avg_speed+prop_arterial+total_vol+SRTS, data = school.data)
#Summary and coefficients
summary(full.log.mdl)
##
## Call:
## lm(formula = log_crash ~ UTC_PCT + total_trees + log.income +
## density + avg_speed + prop_arterial + total_vol + SRTS, data = school.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.16551 -0.44020 0.05294 0.51447 1.62740
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.908e+00 1.545e+00 2.530 0.012203 *
## UTC_PCT -2.289e-02 4.475e-03 -5.116 7.56e-07 ***
## total_trees -1.544e-04 2.125e-04 -0.727 0.468397
## log.income -2.687e-01 8.969e-02 -2.996 0.003103 **
## density 2.232e-02 1.128e-02 1.978 0.049357 *
## avg_speed 9.140e-02 6.040e-02 1.513 0.131861
## prop_arterial 1.663e-01 5.193e-01 0.320 0.749147
## total_vol 3.023e-06 7.837e-07 3.858 0.000156 ***
## SRTS -1.059e-01 1.406e-01 -0.753 0.452155
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7328 on 191 degrees of freedom
## (20 observations deleted due to missingness)
## Multiple R-squared: 0.4327, Adjusted R-squared: 0.4089
## F-statistic: 18.21 on 8 and 191 DF, p-value: < 2.2e-16
coefficients(full.log.mdl)
## (Intercept) UTC_PCT total_trees log.income density
## 3.908286e+00 -2.289305e-02 -1.544026e-04 -2.686638e-01 2.231789e-02
## avg_speed prop_arterial total_vol SRTS
## 9.139601e-02 1.663020e-01 3.023418e-06 -1.059061e-01
exp(coefficients(full.log.mdl))
## (Intercept) UTC_PCT total_trees log.income density
## 49.8135089 0.9773670 0.9998456 0.7644002 1.0225688
## avg_speed prop_arterial total_vol SRTS
## 1.0957028 1.1809297 1.0000030 0.8995091
#Autoplot
full.log.mdl.plot <- autoplot(full.log.mdl, which = 1:3, nrow = 3, ncol = 1)
full.log.mdl.plot

#Distribution of residuals
full.log.mdl.residuals <- residuals(full.log.mdl) %>%
as.data.frame(.)
ggplot(full.log.mdl.residuals, aes(.)) +
theme_minimal()+
labs(title = "Distribution of Residuals (log model)") +
geom_histogram()

#Check VIF
full.log.mdl.vif <- vif(full.log.mdl)
full.log.mdl.vif
## UTC_PCT total_trees log.income density avg_speed
## 1.268390 1.367205 1.344878 1.503438 1.478916
## prop_arterial total_vol SRTS
## 2.417257 2.709501 1.038390
#Full model, log-transformed crashes, major or fatal injuries
full.inj.mdl <- lm(prop_majfat~UTC_PCT+total_trees+log.income+density+avg_speed+prop_arterial+total_vol+SRTS, data = school.data)
#Summary and coefficients
summary(full.inj.mdl)
##
## Call:
## lm(formula = prop_majfat ~ UTC_PCT + total_trees + log.income +
## density + avg_speed + prop_arterial + total_vol + SRTS, data = school.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.22414 -0.09138 -0.01255 0.05309 0.52768
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.272e-01 2.498e-01 1.710 0.08886 .
## UTC_PCT 1.709e-03 7.236e-04 2.362 0.01919 *
## total_trees 2.783e-05 3.437e-05 0.810 0.41901
## log.income -4.534e-02 1.450e-02 -3.126 0.00205 **
## density -2.546e-04 1.825e-03 -0.140 0.88915
## avg_speed 6.934e-03 9.767e-03 0.710 0.47862
## prop_arterial 3.342e-02 8.398e-02 0.398 0.69113
## total_vol -6.410e-08 1.267e-07 -0.506 0.61358
## SRTS 1.291e-03 2.273e-02 0.057 0.95478
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1185 on 191 degrees of freedom
## (20 observations deleted due to missingness)
## Multiple R-squared: 0.09386, Adjusted R-squared: 0.05591
## F-statistic: 2.473 on 8 and 191 DF, p-value: 0.01425
coefficients(full.inj.mdl)
## (Intercept) UTC_PCT total_trees log.income density
## 4.271520e-01 1.708993e-03 2.783290e-05 -4.534424e-02 -2.546412e-04
## avg_speed prop_arterial total_vol SRTS
## 6.933613e-03 3.341776e-02 -6.410189e-08 1.290871e-03
#Autoplot
full.mdl.inj.plot <- autoplot(full.inj.mdl, which = 1:3, nrow = 3, ncol = 1)
full.mdl.inj.plot

#Distribution of residuals
full.inj.mdl.residuals <- residuals(full.inj.mdl) %>%
as.data.frame(.)
ggplot(full.inj.mdl.residuals, aes(.)) +
theme_minimal()+
labs(title = "Distribution of Residuals") +
geom_histogram()

#Check VIF
full.inj.mdl.vif <- vif(full.inj.mdl)
full.inj.mdl.vif
## UTC_PCT total_trees log.income density avg_speed
## 1.268390 1.367205 1.344878 1.503438 1.478916
## prop_arterial total_vol SRTS
## 2.417257 2.709501 1.038390
#Full model, log-transformed crashes, interaction between tree coverage and arterial roads
full.log.mdl.int <- lm(log_crash~UTC_PCT*prop_arterial+total_trees+log.income+density+avg_speed+total_vol+SRTS, data = school.data)
#Summary and coefficients
summary(full.log.mdl.int)
##
## Call:
## lm(formula = log_crash ~ UTC_PCT * prop_arterial + total_trees +
## log.income + density + avg_speed + total_vol + SRTS, data = school.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.16135 -0.45506 0.06426 0.49811 1.59420
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.653e+00 1.638e+00 2.230 0.026923 *
## UTC_PCT -1.954e-02 8.365e-03 -2.335 0.020561 *
## prop_arterial 4.869e-01 8.521e-01 0.571 0.568368
## total_trees -1.503e-04 2.131e-04 -0.705 0.481492
## log.income -2.644e-01 9.033e-02 -2.927 0.003845 **
## density 2.129e-02 1.151e-02 1.849 0.066028 .
## avg_speed 9.696e-02 6.164e-02 1.573 0.117382
## total_vol 3.033e-06 7.855e-07 3.861 0.000155 ***
## SRTS -1.045e-01 1.409e-01 -0.742 0.459274
## UTC_PCT:prop_arterial -1.377e-02 2.897e-02 -0.475 0.635193
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7343 on 190 degrees of freedom
## (20 observations deleted due to missingness)
## Multiple R-squared: 0.4334, Adjusted R-squared: 0.4065
## F-statistic: 16.15 on 9 and 190 DF, p-value: < 2.2e-16
coefficients(full.log.mdl.int)
## (Intercept) UTC_PCT prop_arterial
## 3.653121e+00 -1.953709e-02 4.869188e-01
## total_trees log.income density
## -1.503141e-04 -2.643540e-01 2.128525e-02
## avg_speed total_vol SRTS
## 9.696373e-02 3.032734e-06 -1.044805e-01
## UTC_PCT:prop_arterial
## -1.376659e-02
exp(coefficients(full.log.mdl.int))
## (Intercept) UTC_PCT prop_arterial
## 38.5949361 0.9806525 1.6272945
## total_trees log.income density
## 0.9998497 0.7677017 1.0215134
## avg_speed total_vol SRTS
## 1.1018204 1.0000030 0.9007923
## UTC_PCT:prop_arterial
## 0.9863277
#Autoplot
full.log.mdl.int.plot <- autoplot(full.log.mdl.int, which = 1:3, nrow = 3, ncol = 1)
full.log.mdl.int.plot

#Distribution of residuals
full.log.mdl.int.residuals <- residuals(full.log.mdl.int) %>%
as.data.frame(.)
ggplot(full.log.mdl.int.residuals, aes(.)) +
theme_minimal()+
labs(title = "Distribution of Residuals (interaction)") +
geom_histogram()

Scatter plot
ggplot(school.data, aes(x=UTC_PCT, y = log_crash)) +
geom_point(fill = "darkgrey", size = 3, alpha = 0.5) +
theme_minimal() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(linetype = "solid")
) +
labs(title = "Percent Urban Tree Coverage and \nPedestrain Involved Crashes in School Radius (log)",
x = "Percent Urban Tree Coverage",
y = "Total Number of Crashes (log)") +
scale_x_continuous(breaks = seq(0,80,10)) +
scale_y_continuous(breaks = seq(0,5,0.5)) +
geom_smooth(method = "lm", se = FALSE, alpha = 0.7)

ggplot(school.data, aes(x=UTC_PCT, y = prop_majfat)) +
geom_point(fill = "darkgrey", size = 3, alpha = 0.5) +
theme_minimal() +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(linetype = "solid")
) +
labs(title = "Percent Urban Tree Coverage and \nProportion of Major or Fatal Injuries",
x = "Percent Urban Tree Coverage",
y = "Proportion of Major or Fatal Pedestrian Injuries") +
scale_x_continuous(breaks = seq(0,80,10)) +
scale_y_continuous(breaks = seq(0,1,0.1)) +
geom_smooth(method = "lm", se = FALSE, alpha = 0.7)

Leaflet Map
#Create labels for schools and crashes
school.labels <- paste("<b>", school.data$NAME.x, "</b>", "<br/>",
"Tree Coverage Percent:",school.data$UTC_PCT,"%", "<br/>",
"Total Crashes: ",school.data$total_crashes, "<br/>") %>%
lapply(htmltools::HTML)
#Color coding crashes
crash.bins <- c(0,1,2,3,4,5)
crash.color <- colorBin("Purples", bins = crash.bins)
#Color coding coverage
coverage.bins <- c(0,15,30,45,75)
coverage.color <- colorBin("Greens", bins = coverage.bins)
#Color coding median income
income.bins <- c(0,20000,40000,60000,80000, 100000, 150000, 200000, 251000)
income.color <- colorBin("Blues", bins = income.bins)
#Color coding traffic volume
traffic.bins <- c(0,50000,100000,150000,200000,250000,300000,350000, 400000, 650000)
traffic.color <- colorBin("Reds", bins = traffic.bins)
#Color coding proportion arterial
art.bins <- c(0, 0.2, 0.4, 0.6, 0.7, 0.8, 1)
art.color <- colorBin("Oranges", bins = art.bins)
#Create map with school location, radius, and crashes
school.map <- leaflet() %>%
addProviderTiles(provider = "CartoDB.Positron") %>%
setView( lng = -77.0369
, lat = 38.9072
, zoom = 11) %>%
addMapPane("points", zIndex = 410) %>%
#Crashes
addPolygons(data = school.radius, label = school.labels, stroke = TRUE, fillColor = ~crash.color(school.data$log_crash), color = "#413452", fillOpacity = 0.75,
highlight = highlightOptions(weight = 5, color = "white",bringToFront = TRUE), group = "Crashes") %>%
addLegend(pal = crash.color, values = school.data$log_crash, title = "Total Number of Crashes (log)", position = "bottomright", group = "Crashes") %>%
#Coverage
addPolygons(data = school.radius, label = school.labels, stroke = TRUE, fillColor = ~coverage.color(school.data$UTC_PCT), color = "darkgreen", fillOpacity = 0.75,
highlight = highlightOptions(weight = 5, color = "white",bringToFront = TRUE), group = "Tree Coverage") %>%
addLegend(pal = coverage.color, values = school.data$UTC_PCT, title = "Percent Urban Tree Coverage", position = "bottomright", group = "Tree Coverage") %>%
#Median Income
addPolygons(data = school.radius, label = school.labels, stroke = TRUE, fillColor = ~income.color(school.data$medincomeE.y), color = "darkblue", fillOpacity = 0.75,
highlight = highlightOptions(weight = 5, color = "white",bringToFront = TRUE), group = "Median Income") %>%
addLegend(pal = income.color, values = school.data$medincomeE.y, title = "Median Income", position = "bottomright", group = "Median Income") %>%
#Traffic Volume
addPolygons(data = school.radius, label = school.labels, stroke = TRUE, fillColor = ~traffic.color(school.data$total_vol), color = "darkred", fillOpacity = 0.75,
highlight = highlightOptions(weight = 5, color = "white",bringToFront = TRUE), group = "Traffic Volume") %>%
addLegend(pal = traffic.color, values = school.data$total_vol, title = "Average Traffic Volume by Day", position = "bottomright", group = "Traffic Volume") %>%
#Proportion Arterial
addPolygons(data = school.radius, label = school.labels, stroke = TRUE, fillColor = ~art.color(school.data$prop_arterial), color = "darkorange", fillOpacity = 0.75,
highlight = highlightOptions(weight = 5, color = "white",bringToFront = TRUE), group = "Proportion Arterial") %>%
addLegend(pal = art.color, values = school.data$prop_arterial, title = "Proportion of Arterial Roads", position = "bottomright", group = "Proportion Arterial") %>%
#Crash locations
addCircleMarkers(data = crash_filter, radius = 1, color = "black", group = "Crash Locations", options = pathOptions(pane = "points")) %>%
#Overlay groups
addLayersControl(overlayGroups = c("Crashes", "Crash Locations", "Median Income", "Tree Coverage", "Traffic Volume", "Proportion Arterial")) %>%
hideGroup(c("Crashes", "Tree Coverage", "Median Income", "Traffic Volume", "Proportion Arterial"))
school.map